package com.kd.crawler.http;

import java.io.IOException;
import java.net.MalformedURLException;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.kd.crawler.entity.CrawlerEntry;


/**
 * @author Manfred
 *
 */
public class HttpWebClient {
	private WebClient webClient;
	public HttpWebClient(){
		webClient=new WebClient(BrowserVersion.CHROME);
		webClient.getOptions().setJavaScriptEnabled(true);
        // 2 禁用Css，可避免自动二次請求CSS进行渲染
        webClient.getOptions().setCssEnabled(false);
        // 3 启动客戶端重定向
        webClient.getOptions().setRedirectEnabled(true);
        // 4 js运行错誤時，是否拋出异常
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        // 5 设置超时
        webClient.getOptions().setTimeout(5000);
	}
	public void crawl(CrawlerEntry crawlerUrl){
		HttpResponse response = getHtml(crawlerUrl.getUrl());
        crawlerUrl.setHtml(response.getHtml());
        crawlerUrl.setStatusCode(response.getStatus());
        crawlerUrl.setCrawlTime(response.getCrawlTime());
	}
	
	public HttpResponse getHtml(String url){
		int status=0;
		String html= null;
		HtmlPage htmlPage=null;
		
		try {
			htmlPage = webClient.getPage(url);
			status=htmlPage.getWebResponse().getStatusCode();
			html=htmlPage.asXml();
		} catch (FailingHttpStatusCodeException e) {
			status=e.getStatusCode();
			html=e.getResponse().getContentAsString();
			e.printStackTrace();
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}finally{
			if(htmlPage!=null){
				htmlPage.cleanUp();
			}
			webClient.close();
		}
	    // 等待JS驱动dom完成获得还原后的网页
	     //webClient.waitForBackgroundJavaScript(10000);
        // 网页內容
        /* System.out.println(htmlPage.asXml()); */       
        return new HttpResponse(status,html,System.currentTimeMillis());
	}
	
	public static void main(String args[]){
		HttpWebClient httpUtils=new HttpWebClient();
		System.out.println(httpUtils.getHtml("http://www.ziltax.com/").getHtml());
	}
}
